Inicio al modelaje de datos

OBJETIVO

  • Crear una regresion lineal

REQUISITOS

  1. Contar con R studio.
  2. Usar la carpeta de trabajo Sesion08/Reto-01

DESARROLLO

Usaremos la base de mtcars y crearemos un scatter.smooth para ver la relacion entre peso (variable wt) y millas por galon (variable mpg). Despues checaremos la correlacion entre ambas variables y finalmente crearemos un modelo lineal, lo visualizaremos y sacaremos su resumen.

Usaremos la base de “cars” ya disponible en R. Mediante un scatterplot podremos ver la relacion entre distancia y velocidad. La funcion para crearlo es scatter.smooth(x,y) y podemos anadirle un titulo al grafico con la funcion, main=""

library(corrplot)
corrplot 0.84 loaded

Luego buscaremos la correlacion entre ambas variables:

cor(mtcars$wt, mtcars$mpg)
[1] -0.8676594
modlin <- lm(wt ~ mpg, data=mtcars)  
print(modlin)

Call:
lm(formula = wt ~ mpg, data = mtcars)

Coefficients:
(Intercept)          mpg  
     6.0473      -0.1409  
summary(modlin)

Call:
lm(formula = wt ~ mpg, data = mtcars)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.6516 -0.3490 -0.1381  0.3190  1.3684 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  6.04726    0.30869  19.590  < 2e-16 ***
mpg         -0.14086    0.01474  -9.559 1.29e-10 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4945 on 30 degrees of freedom
Multiple R-squared:  0.7528,    Adjusted R-squared:  0.7446 
F-statistic: 91.38 on 1 and 30 DF,  p-value: 1.294e-10
data(mtcars)
M <- cor(mtcars)

##  different color series
col1 <- colorRampPalette(c("#7F0000", "red", "#FF7F00", "yellow", "white",
                           "cyan", "#007FFF", "blue","#00007F"))
col2 <- colorRampPalette(c("#67001F", "#B2182B", "#D6604D", "#F4A582",
                           "#FDDBC7", "#FFFFFF", "#D1E5F0", "#92C5DE",
                           "#4393C3", "#2166AC", "#053061"))
col3 <- colorRampPalette(c("red", "white", "blue"))
col4 <- colorRampPalette(c("#7F0000", "red", "#FF7F00", "yellow", "#7FFF7F",
                           "cyan", "#007FFF", "blue", "#00007F"))
wb <- c("white", "black")

par(ask = TRUE)

corrplot(M, method = "number", col = "black", cl.pos = "n")

corrplot(M, method = "number")

corrplot(M)

corrplot(M, order = "AOE")

corrplot(M, order = "AOE", addCoef.col = "grey")

corrplot(M, order = "AOE", col = col1(20), cl.length = 21, addCoef.col = "grey")

corrplot(M, order = "AOE", col = col1(10), addCoef.col = "grey")


corrplot(M, order = "AOE", col = col2(200))

corrplot(M, order = "AOE", col = col2(200), addCoef.col = "grey")

corrplot(M, order = "AOE", col = col2(20), cl.length = 21, addCoef.col = "grey")

corrplot(M, order = "AOE", col = col2(10), addCoef.col = "grey")


corrplot(M, order = "AOE", col = col3(100))

corrplot(M, order = "AOE", col = col3(10))

corrplot(M, method = "color", col = col1(20), cl.length = 21, order = "AOE",
         addCoef.col = "grey")

corrplot(M, method = "square", col = col2(200), order = "AOE")

corrplot(M, method = "ellipse", col = col1(200), order = "AOE")

corrplot(M, method = "shade", col = col3(20), order = "AOE")

corrplot(M, method = "pie", order = "AOE")

## col = wb
corrplot(M, col = wb, order = "AOE", outline = TRUE, cl.pos = "n")


## like Chinese wiqi, suit for either on screen or white-black print.
corrplot(M, col = wb, bg = "gold2",  order = "AOE", cl.pos = "n")

## mixed methods: It's more efficient if using function "corrplot.mixed"
## circle + ellipse
corrplot(M, order = "AOE", type = "upper", tl.pos = "d")
corrplot(M, add = TRUE, type = "lower", method = "ellipse", order = "AOE",
         diag = FALSE, tl.pos = "n", cl.pos = "n")


## circle + square
corrplot(M, order = "AOE",type = "upper", tl.pos = "d")
corrplot(M, add = TRUE, type = "lower", method = "square", order = "AOE",
         diag = FALSE, tl.pos = "n", cl.pos = "n")


## circle + colorful number
corrplot(M, order = "AOE", type = "upper", tl.pos = "d")
corrplot(M, add = TRUE, type = "lower", method = "number", order = "AOE",
         diag = FALSE, tl.pos = "n", cl.pos = "n")


## circle + black number
corrplot(M, order = "AOE", type = "upper", tl.pos = "tp")
corrplot(M, add = TRUE, type = "lower", method = "number", order = "AOE",
         col = "black", diag = FALSE, tl.pos = "n", cl.pos = "n")

## order is hclust and draw rectangles
corrplot(M, order = "hclust")

corrplot(M, order = "hclust", addrect = 2)

corrplot(M, order = "hclust", addrect = 3, rect.col = "red")

corrplot(M, order = "hclust", addrect = 4, rect.col = "blue")

corrplot(M, order = "hclust", hclust.method = "ward.D2", addrect = 4)

## visualize a  matrix in [0, 1]
corrplot(abs(M), order = "AOE", cl.lim = c(0,1))

corrplot(abs(M), order = "AOE", col = col1(20), cl.lim = c(0,1))

corrplot(abs(M), order = "AOE", col = col3(200), cl.lim = c(0,1))

## visualize a  matrix in [-100, 100]
ran <- round(matrix(runif(225, -100,100), 15))
corrplot(ran, is.corr = FALSE)

corrplot(ran, is.corr = FALSE, cl.lim = c(-100, 100))


## text-labels and plot type
corrplot(M, order = "AOE", tl.srt = 45)

corrplot(M, order = "AOE", tl.srt = 60)

corrplot(M, order = "AOE", tl.pos = "d", cl.pos = "n")

corrplot(M, order = "AOE", diag = FALSE, tl.pos = "d")

corrplot(M, order = "AOE", type = "upper")

corrplot(M, order = "AOE", type = "upper", diag = FALSE)

corrplot(M, order = "AOE", type = "lower", cl.pos = "b")

corrplot(M, order = "AOE", type = "lower", cl.pos = "b", diag = FALSE)

## an animation of changing confidence interval in different significance level
## begin.animaton
par(ask = FALSE)
for (i in seq(0.1, 0, -0.005)) {
  tmp <- cor.mtest(mtcars, conf.level = 1 - i)
  corrplot(M, p.mat = tmp$p, low = tmp$lowCI, upp = tmp$uppCI, order = "hclust",
           pch.col = "red", sig.level = i, plotCI = "rect", cl.pos = "n",
           mar = c(0, 0, 1, 0),
           title = substitute(alpha == x,
                              list(x = format(i, digits = 3, nsmall = 3))))
  Sys.sleep(0.15)
}

## end.animaton
LS0tCnRpdGxlOiAiUmV0by0wMSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQojIyBJbmljaW8gYWwgbW9kZWxhamUgZGUgZGF0b3MKCiMjIyBPQkpFVElWTwotIENyZWFyIHVuYSByZWdyZXNpb24gbGluZWFsIAoKIyMjIyBSRVFVSVNJVE9TCjEuIENvbnRhciBjb24gUiBzdHVkaW8uCjEuIFVzYXIgbGEgY2FycGV0YSBkZSB0cmFiYWpvIGBTZXNpb24wOC9SZXRvLTAxYAoKIyMjIyBERVNBUlJPTExPClVzYXJlbW9zIGxhIGJhc2UgZGUgbXRjYXJzIHkgY3JlYXJlbW9zIHVuIHNjYXR0ZXIuc21vb3RoIHBhcmEgdmVyIGxhIHJlbGFjaW9uIGVudHJlIHBlc28gKHZhcmlhYmxlIHd0KSB5IG1pbGxhcyBwb3IgZ2Fsb24gKHZhcmlhYmxlIG1wZykuIERlc3B1ZXMgY2hlY2FyZW1vcyBsYSBjb3JyZWxhY2lvbiBlbnRyZSBhbWJhcyB2YXJpYWJsZXMgeSBmaW5hbG1lbnRlIGNyZWFyZW1vcyB1biBtb2RlbG8gbGluZWFsLCBsbyB2aXN1YWxpemFyZW1vcyB5IHNhY2FyZW1vcyBzdSByZXN1bWVuLiAKClVzYXJlbW9zIGxhIGJhc2UgZGUgImNhcnMiIHlhIGRpc3BvbmlibGUgZW4gUi4gTWVkaWFudGUgdW4gc2NhdHRlcnBsb3QgcG9kcmVtb3MgdmVyIGxhIHJlbGFjaW9uIGVudHJlIGRpc3RhbmNpYSB5IHZlbG9jaWRhZC4gTGEgZnVuY2lvbiBwYXJhIGNyZWFybG8gZXMgc2NhdHRlci5zbW9vdGgoeCx5KSB5IHBvZGVtb3MgYW5hZGlybGUgdW4gdGl0dWxvIGFsIGdyYWZpY28gY29uIGxhIGZ1bmNpb24sIG1haW49IiIgCmBgYHtyfQpsaWJyYXJ5KGNvcnJwbG90KQpoZWFkKG10Y2FycykKc2NhdHRlci5zbW9vdGgoeD1tdGNhcnMkd3QsIHk9bXRjYXJzJG1wZywgbWFpbj0iUGVzbyB5IE1pbGxhcyBwb3IgZ2Fsb24iKQpgYGAKCgpMdWVnbyBidXNjYXJlbW9zIGxhIGNvcnJlbGFjaW9uIGVudHJlIGFtYmFzIHZhcmlhYmxlczogCmBgYHtyfQpjb3IobXRjYXJzJHd0LCBtdGNhcnMkbXBnKQpgYGAKCgpgYGB7cn0KbW9kbGluIDwtIGxtKHd0IH4gbXBnLCBkYXRhPW10Y2FycykgIApwcmludChtb2RsaW4pCnN1bW1hcnkobW9kbGluKQpgYGAKYGBge3J9CmRhdGEobXRjYXJzKQpNIDwtIGNvcihtdGNhcnMpCgojIyAgZGlmZmVyZW50IGNvbG9yIHNlcmllcwpjb2wxIDwtIGNvbG9yUmFtcFBhbGV0dGUoYygiIzdGMDAwMCIsICJyZWQiLCAiI0ZGN0YwMCIsICJ5ZWxsb3ciLCAid2hpdGUiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAiY3lhbiIsICIjMDA3RkZGIiwgImJsdWUiLCIjMDAwMDdGIikpCmNvbDIgPC0gY29sb3JSYW1wUGFsZXR0ZShjKCIjNjcwMDFGIiwgIiNCMjE4MkIiLCAiI0Q2NjA0RCIsICIjRjRBNTgyIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgIiNGRERCQzciLCAiI0ZGRkZGRiIsICIjRDFFNUYwIiwgIiM5MkM1REUiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAiIzQzOTNDMyIsICIjMjE2NkFDIiwgIiMwNTMwNjEiKSkKY29sMyA8LSBjb2xvclJhbXBQYWxldHRlKGMoInJlZCIsICJ3aGl0ZSIsICJibHVlIikpCmNvbDQgPC0gY29sb3JSYW1wUGFsZXR0ZShjKCIjN0YwMDAwIiwgInJlZCIsICIjRkY3RjAwIiwgInllbGxvdyIsICIjN0ZGRjdGIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgImN5YW4iLCAiIzAwN0ZGRiIsICJibHVlIiwgIiMwMDAwN0YiKSkKd2IgPC0gYygid2hpdGUiLCAiYmxhY2siKQoKcGFyKGFzayA9IFRSVUUpCgpjb3JycGxvdChNLCBtZXRob2QgPSAibnVtYmVyIiwgY29sID0gImJsYWNrIiwgY2wucG9zID0gIm4iKQpjb3JycGxvdChNLCBtZXRob2QgPSAibnVtYmVyIikKY29ycnBsb3QoTSkKY29ycnBsb3QoTSwgb3JkZXIgPSAiQU9FIikKY29ycnBsb3QoTSwgb3JkZXIgPSAiQU9FIiwgYWRkQ29lZi5jb2wgPSAiZ3JleSIpCmBgYAoKYGBge3J9CmNvcnJwbG90KE0sIG9yZGVyID0gIkFPRSIsIGNvbCA9IGNvbDEoMjApLCBjbC5sZW5ndGggPSAyMSwgYWRkQ29lZi5jb2wgPSAiZ3JleSIpCmNvcnJwbG90KE0sIG9yZGVyID0gIkFPRSIsIGNvbCA9IGNvbDEoMTApLCBhZGRDb2VmLmNvbCA9ICJncmV5IikKCmNvcnJwbG90KE0sIG9yZGVyID0gIkFPRSIsIGNvbCA9IGNvbDIoMjAwKSkKY29ycnBsb3QoTSwgb3JkZXIgPSAiQU9FIiwgY29sID0gY29sMigyMDApLCBhZGRDb2VmLmNvbCA9ICJncmV5IikKY29ycnBsb3QoTSwgb3JkZXIgPSAiQU9FIiwgY29sID0gY29sMigyMCksIGNsLmxlbmd0aCA9IDIxLCBhZGRDb2VmLmNvbCA9ICJncmV5IikKY29ycnBsb3QoTSwgb3JkZXIgPSAiQU9FIiwgY29sID0gY29sMigxMCksIGFkZENvZWYuY29sID0gImdyZXkiKQoKY29ycnBsb3QoTSwgb3JkZXIgPSAiQU9FIiwgY29sID0gY29sMygxMDApKQpjb3JycGxvdChNLCBvcmRlciA9ICJBT0UiLCBjb2wgPSBjb2wzKDEwKSkKYGBgCmBgYHtyfQpjb3JycGxvdChNLCBtZXRob2QgPSAiY29sb3IiLCBjb2wgPSBjb2wxKDIwKSwgY2wubGVuZ3RoID0gMjEsIG9yZGVyID0gIkFPRSIsCiAgICAgICAgIGFkZENvZWYuY29sID0gImdyZXkiKQpjb3JycGxvdChNLCBtZXRob2QgPSAic3F1YXJlIiwgY29sID0gY29sMigyMDApLCBvcmRlciA9ICJBT0UiKQpjb3JycGxvdChNLCBtZXRob2QgPSAiZWxsaXBzZSIsIGNvbCA9IGNvbDEoMjAwKSwgb3JkZXIgPSAiQU9FIikKY29ycnBsb3QoTSwgbWV0aG9kID0gInNoYWRlIiwgY29sID0gY29sMygyMCksIG9yZGVyID0gIkFPRSIpCmNvcnJwbG90KE0sIG1ldGhvZCA9ICJwaWUiLCBvcmRlciA9ICJBT0UiKQpgYGAKCmBgYHtyfQojIyBjb2wgPSB3Ygpjb3JycGxvdChNLCBjb2wgPSB3Yiwgb3JkZXIgPSAiQU9FIiwgb3V0bGluZSA9IFRSVUUsIGNsLnBvcyA9ICJuIikKCiMjIGxpa2UgQ2hpbmVzZSB3aXFpLCBzdWl0IGZvciBlaXRoZXIgb24gc2NyZWVuIG9yIHdoaXRlLWJsYWNrIHByaW50Lgpjb3JycGxvdChNLCBjb2wgPSB3YiwgYmcgPSAiZ29sZDIiLCAgb3JkZXIgPSAiQU9FIiwgY2wucG9zID0gIm4iKQoKYGBgCmBgYHtyfQojIyBtaXhlZCBtZXRob2RzOiBJdCdzIG1vcmUgZWZmaWNpZW50IGlmIHVzaW5nIGZ1bmN0aW9uICJjb3JycGxvdC5taXhlZCIKIyMgY2lyY2xlICsgZWxsaXBzZQpjb3JycGxvdChNLCBvcmRlciA9ICJBT0UiLCB0eXBlID0gInVwcGVyIiwgdGwucG9zID0gImQiKQpjb3JycGxvdChNLCBhZGQgPSBUUlVFLCB0eXBlID0gImxvd2VyIiwgbWV0aG9kID0gImVsbGlwc2UiLCBvcmRlciA9ICJBT0UiLAogICAgICAgICBkaWFnID0gRkFMU0UsIHRsLnBvcyA9ICJuIiwgY2wucG9zID0gIm4iKQoKIyMgY2lyY2xlICsgc3F1YXJlCmNvcnJwbG90KE0sIG9yZGVyID0gIkFPRSIsdHlwZSA9ICJ1cHBlciIsIHRsLnBvcyA9ICJkIikKY29ycnBsb3QoTSwgYWRkID0gVFJVRSwgdHlwZSA9ICJsb3dlciIsIG1ldGhvZCA9ICJzcXVhcmUiLCBvcmRlciA9ICJBT0UiLAogICAgICAgICBkaWFnID0gRkFMU0UsIHRsLnBvcyA9ICJuIiwgY2wucG9zID0gIm4iKQoKIyMgY2lyY2xlICsgY29sb3JmdWwgbnVtYmVyCmNvcnJwbG90KE0sIG9yZGVyID0gIkFPRSIsIHR5cGUgPSAidXBwZXIiLCB0bC5wb3MgPSAiZCIpCmNvcnJwbG90KE0sIGFkZCA9IFRSVUUsIHR5cGUgPSAibG93ZXIiLCBtZXRob2QgPSAibnVtYmVyIiwgb3JkZXIgPSAiQU9FIiwKICAgICAgICAgZGlhZyA9IEZBTFNFLCB0bC5wb3MgPSAibiIsIGNsLnBvcyA9ICJuIikKCiMjIGNpcmNsZSArIGJsYWNrIG51bWJlcgpjb3JycGxvdChNLCBvcmRlciA9ICJBT0UiLCB0eXBlID0gInVwcGVyIiwgdGwucG9zID0gInRwIikKY29ycnBsb3QoTSwgYWRkID0gVFJVRSwgdHlwZSA9ICJsb3dlciIsIG1ldGhvZCA9ICJudW1iZXIiLCBvcmRlciA9ICJBT0UiLAogICAgICAgICBjb2wgPSAiYmxhY2siLCBkaWFnID0gRkFMU0UsIHRsLnBvcyA9ICJuIiwgY2wucG9zID0gIm4iKQpgYGAKYGBge3J9CiMjIG9yZGVyIGlzIGhjbHVzdCBhbmQgZHJhdyByZWN0YW5nbGVzCmNvcnJwbG90KE0sIG9yZGVyID0gImhjbHVzdCIpCmNvcnJwbG90KE0sIG9yZGVyID0gImhjbHVzdCIsIGFkZHJlY3QgPSAyKQpjb3JycGxvdChNLCBvcmRlciA9ICJoY2x1c3QiLCBhZGRyZWN0ID0gMywgcmVjdC5jb2wgPSAicmVkIikKY29ycnBsb3QoTSwgb3JkZXIgPSAiaGNsdXN0IiwgYWRkcmVjdCA9IDQsIHJlY3QuY29sID0gImJsdWUiKQpjb3JycGxvdChNLCBvcmRlciA9ICJoY2x1c3QiLCBoY2x1c3QubWV0aG9kID0gIndhcmQuRDIiLCBhZGRyZWN0ID0gNCkKYGBgCgpgYGB7cn0KIyMgdmlzdWFsaXplIGEgIG1hdHJpeCBpbiBbMCwgMV0KY29ycnBsb3QoYWJzKE0pLCBvcmRlciA9ICJBT0UiLCBjbC5saW0gPSBjKDAsMSkpCmNvcnJwbG90KGFicyhNKSwgb3JkZXIgPSAiQU9FIiwgY29sID0gY29sMSgyMCksIGNsLmxpbSA9IGMoMCwxKSkKY29ycnBsb3QoYWJzKE0pLCBvcmRlciA9ICJBT0UiLCBjb2wgPSBjb2wzKDIwMCksIGNsLmxpbSA9IGMoMCwxKSkKYGBgCgpgYGB7cn0KIyMgdmlzdWFsaXplIGEgIG1hdHJpeCBpbiBbLTEwMCwgMTAwXQpyYW4gPC0gcm91bmQobWF0cml4KHJ1bmlmKDIyNSwgLTEwMCwxMDApLCAxNSkpCmNvcnJwbG90KHJhbiwgaXMuY29yciA9IEZBTFNFKQpjb3JycGxvdChyYW4sIGlzLmNvcnIgPSBGQUxTRSwgY2wubGltID0gYygtMTAwLCAxMDApKQpgYGAKYGBge3J9CiMjIHRleHQtbGFiZWxzIGFuZCBwbG90IHR5cGUKY29ycnBsb3QoTSwgb3JkZXIgPSAiQU9FIiwgdGwuc3J0ID0gNDUpCmNvcnJwbG90KE0sIG9yZGVyID0gIkFPRSIsIHRsLnNydCA9IDYwKQpjb3JycGxvdChNLCBvcmRlciA9ICJBT0UiLCB0bC5wb3MgPSAiZCIsIGNsLnBvcyA9ICJuIikKY29ycnBsb3QoTSwgb3JkZXIgPSAiQU9FIiwgZGlhZyA9IEZBTFNFLCB0bC5wb3MgPSAiZCIpCmNvcnJwbG90KE0sIG9yZGVyID0gIkFPRSIsIHR5cGUgPSAidXBwZXIiKQpjb3JycGxvdChNLCBvcmRlciA9ICJBT0UiLCB0eXBlID0gInVwcGVyIiwgZGlhZyA9IEZBTFNFKQpjb3JycGxvdChNLCBvcmRlciA9ICJBT0UiLCB0eXBlID0gImxvd2VyIiwgY2wucG9zID0gImIiKQpjb3JycGxvdChNLCBvcmRlciA9ICJBT0UiLCB0eXBlID0gImxvd2VyIiwgY2wucG9zID0gImIiLCBkaWFnID0gRkFMU0UpCmBgYAoKYGBge3J9CiMjIGFuIGFuaW1hdGlvbiBvZiBjaGFuZ2luZyBjb25maWRlbmNlIGludGVydmFsIGluIGRpZmZlcmVudCBzaWduaWZpY2FuY2UgbGV2ZWwKIyMgYmVnaW4uYW5pbWF0b24KcGFyKGFzayA9IEZBTFNFKQpmb3IgKGkgaW4gc2VxKDAuMSwgMCwgLTAuMDA1KSkgewogIHRtcCA8LSBjb3IubXRlc3QobXRjYXJzLCBjb25mLmxldmVsID0gMSAtIGkpCiAgY29ycnBsb3QoTSwgcC5tYXQgPSB0bXAkcCwgbG93ID0gdG1wJGxvd0NJLCB1cHAgPSB0bXAkdXBwQ0ksIG9yZGVyID0gImhjbHVzdCIsCiAgICAgICAgICAgcGNoLmNvbCA9ICJyZWQiLCBzaWcubGV2ZWwgPSBpLCBwbG90Q0kgPSAicmVjdCIsIGNsLnBvcyA9ICJuIiwKICAgICAgICAgICBtYXIgPSBjKDAsIDAsIDEsIDApLAogICAgICAgICAgIHRpdGxlID0gc3Vic3RpdHV0ZShhbHBoYSA9PSB4LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsaXN0KHggPSBmb3JtYXQoaSwgZGlnaXRzID0gMywgbnNtYWxsID0gMykpKSkKICBTeXMuc2xlZXAoMC4xNSkKfQojIyBlbmQuYW5pbWF0b24KYGBgCgo=